Setup
Parameters
print(params)
## $gene.metadata.path
## [1] "../../data/gene_metadata"
##
## $input.samples.rds.path
## [1] "../../results/01_Filter_Samples"
##
## $input.variability.rds.path
## [1] "../../results/02_Variability_Jackknife"
##
## $output.rds.path
## [1] "../../results/03_Variability_Plots"
##
## $species
## [1] "LOC"
##
## $species.name
## [1] "Lepisosteus oculatus"
##
## $min.percentile
## [1] 0
##
## $max.percentile
## [1] 0.95
##
## $all.nonzero.matrix
## [1] FALSE
Functions
source("../functions/helper_functions.R")
source("../functions/variability_jackknife.R")
source("../functions/variability_plots.R")
## Warning: package 'ggpubr' was built under R version 4.1.3
## Warning: package 'cowplot' was built under R version 4.1.3
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
Session Info
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 11.7.8
##
## Matrix products: default
## LAPACK: /Users/cbucao/miniforge3/envs/fish-ev-gene-function/lib/libopenblasp-r0.3.25.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] cowplot_1.1.1 ggpubr_0.4.0 slider_0.2.2 viridis_0.6.3 viridisLite_0.4.1
## [6] gplots_3.1.3 ggplot2_3.3.6 matrixStats_0.62.0 edgeR_3.34.1 limma_3.48.3
## [11] colorBlindness_0.1.9 dplyr_1.0.9
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.6 tidyr_1.2.1 jsonlite_1.8.9 carData_3.0-5 warp_0.2.0
## [6] gtools_3.9.4 bslib_0.5.0 BiocManager_1.30.21 highr_0.10 yulab.utils_0.1.8
## [11] renv_0.17.3 yaml_2.3.7 pillar_1.7.0 backports_1.4.1 lattice_0.21-8
## [16] glue_1.8.0 digest_0.6.37 ggsignif_0.6.4 colorspace_2.1-0 htmltools_0.5.5
## [21] pkgconfig_2.0.3 broom_1.0.5 purrr_0.3.5 tidytree_0.4.6 scales_1.2.1
## [26] tibble_3.1.7 generics_0.1.3 farver_2.1.1 car_3.1-2 ellipsis_0.3.2
## [31] cachem_1.0.8 withr_3.0.2 lazyeval_0.2.2 cli_3.6.3 magrittr_2.0.3
## [36] crayon_1.5.3 evaluate_1.0.1 fs_1.6.5 fansi_1.0.4 nlme_3.1-162
## [41] rstatix_0.7.2 tools_4.1.0 lifecycle_1.0.4 munsell_0.5.0 locfit_1.5-9.7
## [46] compiler_4.1.0 jquerylib_0.1.4 caTools_1.18.2 gridGraphics_0.5-1 rlang_1.1.4
## [51] grid_4.1.0 bitops_1.0-7 labeling_0.4.2 rmarkdown_2.22 gtable_0.3.3
## [56] abind_1.4-5 DBI_1.1.3 R6_2.5.1 gridExtra_2.3 knitr_1.43
## [61] fastmap_1.1.1 utf8_1.2.3 treeio_1.16.2 KernSmooth_2.23-21 ape_5.8
## [66] Rcpp_1.0.10 vctrs_0.4.1 tidyselect_1.2.0 xfun_0.39
Data
Processed data
# Load saved data from 01_Filter_Samples.Rmd
# Normalized by condition
if (!params$all.nonzero.matrix) {
filtered.samples <- readRDS(
file.path(params$input.samples.rds.path,
paste(params$species, "4_reps_data.conditions.rds", sep="_")))
} else {
filtered.samples <- readRDS(
file.path(params$input.samples.rds.path,
paste(params$species, "4_reps_data.conditions.nonzero.rds", sep="_")))
}
Tissue <- as.factor(filtered.samples$metadata$Tissue)
if ("Sex" %in% colnames(filtered.samples$metadata)) {
Sex <- as.factor(filtered.samples$metadata$Sex)
}
# Load jackknifed expression variability estimates from 02_Variability_Jackknife.Rmd
## Adjusted SD
if (!params$all.nonzero.matrix) {
jack.adj.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.adj.sd.rds", sep="_")))
## Residual SD
jack.resid.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.sd.rds", sep="_")))
## Residual CV / Local (residual) CV
jack.resid.cv <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.cv.rds", sep="_")))
} else {
jack.adj.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.adj.sd.nonzero.rds", sep="_")))
## Residual SD
jack.resid.sd <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.sd.nonzero.rds", sep="_")))
## Residual CV / Local (residual) CV
jack.resid.cv <- readRDS(
file.path(params$input.variability.rds.path,
paste(params$species, "jack.resid.cv.nonzero.rds", sep="_")))
}
Main
Initial summary statistics
if ("Sex" %in% colnames(filtered.samples$metadata)) {
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
compute_gene_summary_stats(filtered.samples$log2.tmm.cpm.gf[[t]][[s]])
})
})
# Remove top and bottom x% of genes by expression level
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
summary.stats[[t]][[s]][
summary.stats[[t]][[s]]$Rank_Mean>params$min.percentile &
summary.stats[[t]][[s]]$Rank_Mean<params$max.percentile,]
})
})
# Remove missing conditions
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
summary.stats[[t]][[s]][complete.cases(summary.stats[[t]][[s]]),]
})
})
} else {
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
compute_gene_summary_stats(filtered.samples$log2.tmm.cpm.gf[[t]])
})
# Remove top and bottom x% of genes by expression level
summary.stats <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
summary.stats[[t]][
summary.stats[[t]]$Rank_Mean>params$min.percentile &
summary.stats[[t]]$Rank_Mean<params$max.percentile,]
})
}
Jackknife
Summarized results
# Adjusted SD
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.adj.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.adj.sd[[t]][[s]])) {
jack.adj.sd[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.adj.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.adj.sd[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}
# Residual SD
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.resid.sd[[t]][[s]])) {
jack.resid.sd[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.resid.sd.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.sd[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}
# Residual CV
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.cv.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.resid.cv[[t]][[s]])) {
jack.resid.cv[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
})
})
} else {
jack.resid.cv.summary <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.cv[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
})
}
Protein-coding vs lncRNA
Split by gene biotype
jack.resid.cv.biotype <- list()
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.cv.biotype$coding <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
jack.resid.cv[[t]][[s]]$summary[
rownames(jack.resid.cv[[t]][[s]]$summary) %in%
biotype$Gene.stable.ID[biotype$Gene.type=="protein_coding"],]
})
})
jack.resid.cv.biotype$lncrna <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
jack.resid.cv[[t]][[s]]$summary[
rownames(jack.resid.cv[[t]][[s]]$summary) %in%
biotype$Gene.stable.ID[biotype$Gene.type %in% c("lncRNA", "lincRNA")],]
})
})
} else {
jack.resid.cv.biotype$coding <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.cv[[t]]$summary[
rownames(jack.resid.cv[[t]]$summary) %in%
biotype$Gene.stable.ID[biotype$Gene.type=="protein_coding"],]
})
jack.resid.cv.biotype$lncrna <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.cv[[t]]$summary[
rownames(jack.resid.cv[[t]]$summary) %in%
biotype$Gene.stable.ID[biotype$Gene.type %in% c("lncRNA", "lincRNA")],]
})
}
Bind jackknife results across all conditions
if ("Sex" %in% colnames(filtered.samples$metadata)) {
jack.resid.cv.bind <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
lapply(setNames(levels(Sex), levels(Sex)), function(s) {
if (!is.null(jack.resid.cv[[t]][[s]])) {
jack.resid.cv[[t]][[s]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}
}) %>%
dplyr::bind_rows(.id="Sex")
}) %>%
dplyr::bind_rows(.id="Tissue")
} else {
jack.resid.cv.bind <-
lapply(setNames(levels(Tissue), levels(Tissue)), function(t) {
jack.resid.cv[[t]]$summary %>%
tibble::rownames_to_column(., var="GeneID")
}) %>%
dplyr::bind_rows(.id="Tissue")
}
jack.resid.cv.bind <- jack.resid.cv.bind %>%
dplyr::inner_join(biotype[,-1], by=c("GeneID"="Gene.stable.ID"))
colnames(jack.resid.cv.bind)[ncol(jack.resid.cv.bind)] <- "Biotype"
jack.resid.cv.bind <-
jack.resid.cv.bind[jack.resid.cv.bind$Biotype %in% c("protein_coding", "lncRNA", "lincRNA"),]
Compare variability rank distribution of protein-coding genes and
lncRNA
if ("Sex" %in% colnames(filtered.samples$metadata)) {
boxplot_var_rank_lncrna_vs_coding(jack.resid.cv.bind, sex=TRUE, method="cv")
} else {
boxplot_var_rank_lncrna_vs_coding(jack.resid.cv.bind, sex=FALSE, method="cv")
}

Save
if (!dir.exists(params$output.rds.path)) { dir.create(params$output.rds.path, showWarnings=FALSE) }
if (!params$all.nonzero.matrix) {
saveRDS(jack.resid.cv.bind,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.bind.rds", sep="_")))
saveRDS(jack.resid.cv.summary,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.summary.rds", sep="_")))
saveRDS(jack.resid.cv.biotype,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.biotype.rds", sep="_")))
} else {
saveRDS(jack.resid.cv.bind,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.bind.nonzero.rds", sep="_")))
saveRDS(jack.resid.cv.summary,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.summary.nonzero.rds", sep="_")))
saveRDS(jack.resid.cv.biotype,
file.path(params$output.rds.path,
paste(params$species, "jack.resid.cv.biotype.nonzero.rds", sep="_")))
}